package org.apache.lucene.analysis.pinyin.lucene4;

import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.pinyin.utils.Constant;
import org.wltea.analyzer.lucene.IKTokenizer;
/**
 * 自定义拼音分词器
 * @author Lanxiaowei
 *
 */
public class PinyinAnalyzer extends Analyzer {
	private int minGram;
	private int maxGram;
	private boolean useSmart;
	/** 是否需要对中文进行NGram[默认为false] */
	private boolean nGramChinese;
	/** 是否需要对纯数字进行NGram[默认为false] */
	private boolean nGramNumber;
	/**是否开启edgesNGram模式*/
	private boolean edgesNGram;
	
	/**
	 * 自定义拼音分词器
	 */
	public PinyinAnalyzer() {
		this(Constant.DEFAULT_IK_USE_SMART);
	}
	/**
	 * 自定义拼音分词器
	 * @param useSmart
	 */
	public PinyinAnalyzer(boolean useSmart) {
		this(Constant.DEFAULT_MIN_GRAM, Constant.DEFAULT_MAX_GRAM, Constant.DEFAULT_EDGES_GRAM, useSmart,Constant.DEFAULT_NGRAM_CHINESE);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 */
	public PinyinAnalyzer(int minGram) {
		this(minGram, Constant.DEFAULT_MAX_GRAM, Constant.DEFAULT_EDGES_GRAM, Constant.DEFAULT_IK_USE_SMART, Constant.DEFAULT_NGRAM_CHINESE,Constant.DEFAULT_NGRAM_NUMBER);
	}

	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param useSmart
	 */
	public PinyinAnalyzer(int minGram,boolean useSmart) {
		this(minGram, Constant.DEFAULT_MAX_GRAM, Constant.DEFAULT_EDGES_GRAM, useSmart,Constant.DEFAULT_NGRAM_CHINESE);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param maxGram
	 */
	public PinyinAnalyzer(int minGram, int maxGram) {
		this(minGram, maxGram, Constant.DEFAULT_EDGES_GRAM);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param maxGram
	 * @param edgesNGram
	 */
	public PinyinAnalyzer(int minGram, int maxGram,boolean edgesNGram) {
		this(minGram, maxGram, edgesNGram, Constant.DEFAULT_IK_USE_SMART);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param maxGram
	 * @param edgesNGram
	 * @param useSmart
	 */
	public PinyinAnalyzer(int minGram, int maxGram,boolean edgesNGram,boolean useSmart) {
		this(minGram, maxGram, edgesNGram, useSmart,Constant.DEFAULT_NGRAM_CHINESE);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param maxGram
	 * @param edgesNGram
	 * @param useSmart
	 * @param nGramChinese 是否需要对中文进行NGram[默认为false]
	 */
	public PinyinAnalyzer(int minGram, int maxGram,boolean edgesNGram,boolean useSmart,
			boolean nGramChinese) {
		this(minGram, maxGram, edgesNGram, useSmart,nGramChinese,Constant.DEFAULT_NGRAM_NUMBER);
	}
	/**
	 * 自定义拼音分词器
	 * @param minGram
	 * @param maxGram
	 * @param edgesNGram
	 * @param useSmart
	 * @param nGramChinese 是否需要对中文进行NGram[默认为false]
	 * @param nGramNumber
	 */
	public PinyinAnalyzer(int minGram, int maxGram,boolean edgesNGram,boolean useSmart,
			boolean nGramChinese,boolean nGramNumber) {
		super();
		this.minGram = minGram;
		this.maxGram = maxGram;
		this.edgesNGram = edgesNGram;
		this.useSmart = useSmart;
		this.nGramChinese = nGramChinese;
		this.nGramNumber = nGramNumber;
	}

	@Override
	protected TokenStreamComponents createComponents(String s, Reader reader) {
		Tokenizer tokenizer = new IKTokenizer(reader, useSmart);
		//转拼音
		TokenStream tokenStream = new PinyinTokenFilter(tokenizer,
			Constant.DEFAULT_SHORT_PINYIN,Constant.DEFAULT_PINYIN_ALL, Constant.DEFAULT_MIN_TERM_LRNGTH);
		//对拼音进行NGram处理
		if(edgesNGram) {
			tokenStream = new PinyinEdgeNGramTokenFilter(tokenStream,this.minGram,
				this.maxGram,this.nGramChinese,this.nGramNumber);
		} else {
			tokenStream = new PinyinNGramTokenFilter(tokenStream,this.minGram,
					this.maxGram,this.nGramChinese,this.nGramNumber);
		}
	    return new Analyzer.TokenStreamComponents(tokenizer, tokenStream);
	}

//	@Override
//	protected TokenStreamComponents createComponents(String fieldName) {
//		Reader reader = new BufferedReader(new StringReader(fieldName));
//		Tokenizer tokenizer = new IKTokenizer(reader, useSmart);
//		//转拼音
//		TokenStream tokenStream = new PinyinTokenFilter(tokenizer,
//			Constant.DEFAULT_SHORT_PINYIN,Constant.DEFAULT_PINYIN_ALL, Constant.DEFAULT_MIN_TERM_LRNGTH);
//		//对拼音进行NGram处理
//		if(edgesNGram) {
//			tokenStream = new PinyinEdgeNGramTokenFilter(tokenStream,this.minGram,
//				this.maxGram,this.nGramChinese,this.nGramNumber);
//		} else {
//			tokenStream = new PinyinNGramTokenFilter(tokenStream,this.minGram,
//					this.maxGram,this.nGramChinese,this.nGramNumber);
//		}
//	    return new Analyzer.TokenStreamComponents(tokenizer, tokenStream);
//	}
}
